In [1]:
    
import pandas as pd
import numpy as np
    
In [2]:
    
sales = pd.read_csv('foodmart.sales.tsv', sep = '\t', header = 0, parse_dates = [2])
    
In [3]:
    
sales.head()
    
    Out[3]:
In [4]:
    
products = pd.read_csv('foodmart.products.tsv', sep = '\t', header = 0)
    
In [5]:
    
products.head()
    
    Out[5]:
In [6]:
    
sales = sales.merge(products[['product_id', 'product_name']], 
                    on = ['product_id'], how = 'inner')
    
In [7]:
    
sales.head()
    
    Out[7]:
In [8]:
    
sparse_sales = pd.pivot_table(sales, values='sales', index=['date', 'store_id'],
                     columns=['product_name'], fill_value = 0)
    
In [9]:
    
sparse_sales.head()
    
    Out[9]:
In [10]:
    
sales_correlation = sparse_sales.corr()
    
In [11]:
    
sales_correlation.head()
    
    Out[11]:
In [14]:
    
product_name = 'American Chicken Hot Dogs'
sales_correlation[[product_name]].sort_values(product_name, ascending = True).head()
    
    Out[14]:
In [15]:
    
min_corr = pd.DataFrame(sales_correlation.min())
min_corr.columns = ['min']
min_corr.sort_values(by = 'min').head()
    
    Out[15]:
In [16]:
    
max_corr = pd.DataFrame(sales_correlation.apply(lambda x : 
                                                np.max(filter(lambda x : x != 1., x)), 
                        axis = 1))
max_corr.columns = ['max']
max_corr.sort_values(by = 'max', ascending = False).head()
    
    Out[16]:
In [17]:
    
product_name = 'Plato French Roast Coffee'
sales_correlation[[product_name]].sort_values(product_name, ascending = False).head()
    
    Out[17]: